/*
 * This file is part of ELKI:
 * Environment for Developing KDD-Applications Supported by Index-Structures
 *
 * Copyright (C) 2019
 * ELKI Development Team
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package elki.outlier.clustering;

import elki.clustering.em.EM;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.datastore.DataStoreFactory;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.ids.DBIDIter;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.Relation;
import elki.outlier.OutlierAlgorithm;
import elki.result.Metadata;
import elki.result.outlier.OutlierResult;
import elki.result.outlier.OutlierScoreMeta;
import elki.result.outlier.ProbabilisticOutlierScore;
import elki.utilities.ClassGenericsUtil;
import elki.utilities.datastructures.iterator.It;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.parameterization.Parameterization;

/**
 * Outlier detection algorithm using EM Clustering.
 * <p>
 * If an object does not belong to any cluster it is supposed to be an outlier.
 * If the probability for an object to belong to the most probable cluster is
 * still relatively low this object is an outlier.
 * 
 * @author Lisa Reichert
 * @since 0.3
 *
 * @has - - - EM
 *
 * @param <V> Vector type
 */
// TODO: Allow using an existing EM result
@Title("EM Outlier: Outlier Detection based on the generic EM clustering")
@Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
public class EMOutlier<V extends NumberVector> implements OutlierAlgorithm {
  /**
   * Inner algorithm.
   */
  private EM<V, ?> emClustering;

  /**
   * Constructor with an existing EM clustering algorithm.
   * 
   * @param emClustering EM clustering algorithm to use.
   */
  public EMOutlier(EM<V, ?> emClustering) {
    super();
    this.emClustering = emClustering;
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
  }

  /**
   * Runs the algorithm in the timed evaluation part.
   * 
   * @param relation Relation to process
   * @return Outlier result
   */
  public OutlierResult run(Relation<V> relation) {
    emClustering.setSoft(true);
    Clustering<?> emresult = emClustering.run(relation);
    Relation<double[]> soft = null;
    for(It<Relation<double[]>> iter = Metadata.hierarchyOf(emresult).iterChildren().filter(Relation.class); iter.valid(); iter.advance()) {
      if(iter.get().getDataTypeInformation() == EM.SOFT_TYPE) {
        soft = iter.get();
      }
    }
    if(soft == null) {
      throw new IllegalStateException("No soft assignment generated by EM?");
    }

    double globmax = 0.0;
    WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      double maxProb = Double.POSITIVE_INFINITY;
      double[] probs = soft.get(iditer);
      for(double prob : probs) {
        maxProb = Math.min(1. - prob, maxProb);
      }
      emo_score.putDouble(iditer, maxProb);
      globmax = Math.max(maxProb, globmax);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("EM outlier scores", relation.getDBIDs(), emo_score);
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
    // combine results.
    OutlierResult result = new OutlierResult(meta, scoreres);
    // TODO: add a keep-EM flag?
    Metadata.hierarchyOf(result).addChild(emresult);
    return result;
  }

  /**
   * Parameterization class.
   * 
   * @author Erich Schubert
   */
  public static class Par<V extends NumberVector> implements Parameterizer {
    /**
     * EM clustering algorithm to run.
     */
    protected EM<V, ?> em;

    @Override
    public void configure(Parameterization config) {
      Class<EM<V, ?>> cls = ClassGenericsUtil.uglyCastIntoSubclass(EM.class);
      em = config.tryInstantiate(cls);
    }

    @Override
    public EMOutlier<V> make() {
      return new EMOutlier<>(em);
    }
  }
}
